/**************************************************************************
	Replication do-file: "Corruption in Customs"
	Cyril Chalendard, Ana Fernandes, Gael Raballand and Bob Rijkers
	
	Created on: 01/07/2022
**************************************************************************/

clear all
set more off, perm
cap log close
cls

* ----------------------- DIRECTORIES AND FOLDERS ----------------------- *

* Directories in which data are stored
global main "PUT YOUR DIRECTORY PATH HERE"
cd "$folder"

* Folders
global outputdata = "$main\Output Data"
cap mkdir "$main\Tables"
global tables = "$main\Tables"

* ----------------------------- BEGINS HERE ----------------------------- *

* -------
* Dataset
* -------
use "$outputdata\CFRR.dta", clear // open dataset

* Sample
keep if sample_int==1 & eis_f!=. // keep observations of interest


* ---------
* Table A25
* ---------

* Panel A
* -------

preserve

* Matrix
mat tableA25_1 = J(16,3,.)
mat tableA25_1T = J(1,3,.)

* Dataset
collapse (mean) sig_c_f age male management economics law tenure sample_int wfr if sample_reg==1 & nc_sem<11 & eis_f!=. , by(is insp_f nc_sem) // collapse keeping means by inspector-semester

* New variables
gen logtenure = log(tenure) // tenure's logarithm
egen msig_c_f = mean(sig_c_f) , by(insp_f) // significant excess interaction mean by inspector
egen mtenure = mean(tenure) , by(insp_f) // mean tenure by inspector
gen logmtenure = log(mtenure) // tenure's mean logarithm
gen mitd1=(mtenure<=2) // indicator for tenure < 2
gen mitd2=(mtenure>2 & mtenure<=4) // indicator for tenure between 2 and 4
gen mitd3=(mtenure>4 & mtenure<=6) // indicator for tenure between 4 and 6

* Label variables
local vars `" "logtenure" "male" "age" "economics" "management" "law" "msig_c_f" "mitd1" "mitd2" "mitd3" "'
local labels `" "Tenure (log)" "Male" "Age" "Economics degree" "Management degree" "Law degree" "Average significant excess interaction" "Aveage tenure 1 year or less" "Average tenure: 1-2 years" "Average tenure: 2-3 years" "'
forvalues i = 1(1)10 {
	
	local x: word `i' of `vars'
	local y: word `i' of `labels'
	
	label var `x' "`y'"
}

* Dataset
sort insp_f nc_sem // sort observations
bys insp_f: gen count = _n // create the count of inspectors

* Globals
global indep1 "male mitd2 mitd3"
global indep2 "male mitd2 mitd3 age"
global indep3 "male mitd2 mitd3 age management economics law"

* Regressions
** Column 1
qui: reg msig_c_f $indep1 if count==1 , cluster(insp_f)
local f = 1
forvalues i = 1(1)3 {

	local x: word `i' of $indep1

	mat tableA25_1[`f',1] = _b[`x']
	local f = `f' + 1
	mat tableA25_1[`f',1] = _se[`x']
	local f = `f' + 1
}
mat tableA25_1T[1,1] = e(N)
mat tableA25_1[16,1] = e(r2)
** Column 2
qui: reg msig_c_f $indep2 if count==1 , cluster(insp_f)
local f = 1
forvalues i = 1(1)4 {

	local x: word `i' of $indep2

	mat tableA25_1[`f',2] = _b[`x']
	local f = `f' + 1
	mat tableA25_1[`f',2] = _se[`x']
	local f = `f' + 1
}
mat tableA25_1T[1,2] = e(N)
mat tableA25_1[16,2] = e(r2)
** Column 3
qui: reg msig_c_f $indep3 if count==1 , cluster(insp_f)
local f = 1
forvalues i = 1(1)7 {

	local x: word `i' of $indep3

	mat tableA25_1[`f',3] = _b[`x']
	local f = `f' + 1
	mat tableA25_1[`f',3] = _se[`x']
	local f = `f' + 1
}
mat tableA25_1T[1,3] = e(N)
mat tableA25_1[16,3] = e(r2)

restore


* Panel B
* -------

preserve

* Matrix
mat tableA25_2 = J(12,3,.)
mat tableA25_2T = J(1,3,.)

* Sample
keep if sample_int // keep observations of interest

* New variables
** Broker market share
gen n = cond(key_dec!="" & sample_reg==1 & eis_f!=., 1, .) // constant if conditions are fulfilled
egen t = sum(n) if sample_reg==1 & eis_f!=. , by(nc_sem) // total by semester
egen t_bms = sum(n) if sample_reg==1 & eis_f!=., by(bs) // total by broker
gen bmsh = t_bms/t // broker market share in 21To (time varying)
** Broker tenure (in our sample)
gen b_tenure = year_reg - broker_startyear // tenure in years

* Dataset		
collapse (mean) sig_c_f toa ob bmsh b_tenure sample_int wfr nc_sem if sample_reg==1 & nc_sem<11 & eis_f!=. , by(key_dec bs) // collapse keeping means by broker-semester
sort key_dec nc_sem // sort observations
bys key_dec: gen count = _n // create the count of brokers

* New variables
egen Asig_c_f = mean(sig_c_f) , by(key_dec) // significant excess interaction mean by broker
egen Abms = mean(bmsh) , by(key_dec)	// market share mean by broker
egen Ab_tenure = mean(b_tenure), by(key_dec) // mean tenure by broker
gen abtd3 = (Ab_tenure>5 & Ab_tenure<=10) // indicator for tenure between 5 and 10
gen abtd4 = (Ab_tenure>10) // indicator for tenure over 10

* Label variables
local vars `" "Asig_c_f" "Abms" "Ab_tenure" "ob" "toa" "abtd3" "abtd4" "'
local labels `" "Average significant excess interaction" "Market share (average)" "Mean tenure (in years)" "Logarithm of the tenures mean (in years)" "Broker=self" "Toamasina" "Broker tenure >5-10 years" "Broker tenure 10 plus years" "'
forvalues i = 1(1)7 {
	
	local x: word `i' of `vars'
	local y: word `i' of `labels'
	
	label var `x' "`y'"
}

* Globals
global indep2 "toa ob"
global indep3 "toa ob Abms abtd3 abtd4"

* Regressions
** Column 1
qui: reg Asig_c_f toa if count==1 , cluster(key_dec)
mat tableA25_2[1,1] = _b[toa]
mat tableA25_2[2,1] = _se[toa]
mat tableA25_2T[1,1] = e(N)
mat tableA25_2[12,1] = e(r2)
** Column 2
qui: reg Asig_c_f $indep2 if count==1 , cluster(key_dec)
local f = 1
forvalues i = 1(1)2 {

	local x: word `i' of $indep2

	mat tableA25_2[`f',2] = _b[`x']
	local f = `f' + 1
	mat tableA25_2[`f',2] = _se[`x']
	local f = `f' + 1
}
mat tableA25_2T[1,2] = e(N)
mat tableA25_2[12,2] = e(r2)
** Column 3
qui: reg Asig_c_f $indep3 if ob!=. & toa!=. & count==1 , cluster(key_dec)
local f = 1
forvalues i = 1(1)5 {

	local x: word `i' of $indep3

	mat tableA25_2[`f',3] = _b[`x']
	local f = `f' + 1
	mat tableA25_2[`f',3] = _se[`x']
	local f = `f' + 1
}
mat tableA25_2T[1,3] = e(N)
mat tableA25_2[12,3] = e(r2)

restore


* Export
* ------
putexcel set "$tables\Table A25.xlsx", replace sheet(TableA25) // create a new excel spreadsheet

* Dependent variables
putexcel A1 = "Dependent variable:", left vcenter bold
putexcel (B1:D1), merge hcenter vcenter bold
putexcel B1 = "Average excess interaction share"

* Panel A
** Name
putexcel (A3:D3), merge hcenter vcenter bold
putexcel A3 = "Panel A. Inspectors"
** Independent variables
local names `" "Male" "Average tenure: 1-2 years" "Average tenure: 2-3 years" "Age" "Management degree" "Economics degree" "Law degree" "'
local j = 5
forvalues i = 1(1)7 {
	
	local x: word `i' of `names'
	
	putexcel A`j' = "`x'", left vcenter
	local j = `j'+2
}
putexcel A19 = "Observations", left vcenter
putexcel A20 = "R-squared", left vcenter
** Coefficients
putexcel B5 = matrix(tableA25_1), nformat(0.000) hcenter vcenter
putexcel B19 = matrix(tableA25_1T), nformat(number_sep) hcenter vcenter

* Panel B
** Name
putexcel (A22:D22), merge hcenter vcenter bold
putexcel A22 = "Panel B: Brokers"
** Independent variables
local names `" "Based in Toamasina" "Importer acting as own broker" "Average market share" "Broker tenure: 5-10 years" "Broker tenure: more than 10 years" "'
local j = 24
forvalues i = 1(1)5 {
	
	local x: word `i' of `names'
	
	putexcel A`j' = "`x'", left vcenter
	local j = `j'+2
}
putexcel A34 = "Observations", left vcenter
putexcel A35 = "R-squared", left vcenter
** Coefficients
putexcel B24 = matrix(tableA25_2), nformat(0.000) hcenter vcenter
putexcel B34 = matrix(tableA25_2T), nformat(number_sep) hcenter vcenter

* Column numbers
local cells `" "B" "C" "D" "B" "C" "D" "'
forvalues i = 1(1)6 {

	local y: word `i' of `cells'
	
	if `i' < 4 {
		putexcel `y'4 = "(`i')", hcenter vcenter
	}
	
	if `i' > 3 {
		putexcel `y'23 = "(`i')", hcenter vcenter
	}
}


* -------------------------------- ENDS HERE -------------------------------- *